from sklearn.feature_extraction.text import CountVectorizer


def text_demo():
    data = ["life is short,i like like python", "life is too long,i dislike python"]
    data = ["人生苦短，我喜欢Python", "生活太长久，我不喜欢Python"]

    # 1.初始化CountVectorizer
    transfer = CountVectorizer()
    # 2.调用fit_transform
    data = transfer.fit_transform(data)

    print(data)
    print(data.toarray())
    print(transfer.get_feature_names_out())


if __name__ == '__main__':
    text_demo()